## ----------------------------------------------------------------------------
## Title: qualitative.R
## Author: Elsa Voytas
## Created: April-10-2024
## Last updated: April-13-2025
## ----------------------------------------------------------------------------
library(readr)
library(dplyr)
library(grid)
library(eulerr)
library(broom)
library(forcats)
library(tidyverse)
library(tidytext)
library(tm)
library(wordcloud)
library(stm)
library(RColorBrewer)
library(NLP)
library(lubridate)
library(forcats)
library(stringr)
library(purrr)
library(readr)
library(tidyr)
library(tibble)
library(ggplot2)

testimonies <- read_csv("Input/testimony_data.csv")
tablesums <- tidy(testimonies %>% 
                    dplyr::select('physical','psychological','economic')
                  %>% ftable())
intersections <- euler(c("Economic" = 50, "Economic&Physical" = 36, "Economic&Psychological" = 20,
                                 "Economic&Physical&Psychological" = 67, "Physical&Psychological" = 116,
                         "Physical" = 191, "Psychological" = 39))

# Figure 9 ("Contents of victim testimonies")
pdf(file = "Output/figure9.pdf", width = 14, height = 7)
plot(intersections,
     labels = list(
       labels = c("Economic", "Physical", "Psychological"),
       fontsize = 18
     ),
  quantities = list(fontsize = 0))
grid::grid.text("No mention: 485 (48%)", x=0.83, y=0.12, gp=gpar(col="black", fontsize=18, fontface=1))
grid::grid.text("39 (4%)", x=0.415, y=0.857, gp=gpar(col="black", fontsize=18, fontface=1))
grid::grid.text("50 (5%)", x=0.275, y=0.42, gp=gpar(col="black", fontsize=18, fontface=1))
grid::grid.text("191 (19%)", x=0.625, y=0.19, gp=gpar(col="black", fontsize=18, fontface=1))
grid::grid.text("20", x=0.34, y=0.69, gp=gpar(col="black", fontsize=18, fontface=1))
grid::grid.text("(2%)", x=0.333, y=0.663, gp=gpar(col="black", fontsize=18, fontface=1))
grid::grid.text("116 (12%)", x=0.57, y=0.71, gp=gpar(col="black", fontsize=18, fontface=1))
grid::grid.text("36 (4%)", x=0.38, y=0.3, gp=gpar(col="black", fontsize=18, fontface=1))
grid::grid.text("67 (7%)", x=0.42, y=0.58, gp=gpar(col="black", fontsize=18, fontface=1))
dev.off()

print("Figure 9 complete")

# Figure S4 ("Contents of victim testimonies") -- part of Dataverse supplement
subset <- filter(testimonies, subset == 1)
tablesums_subset <- tidy(subset %>% dplyr::select('physical','psychological','economic')
                         %>% ftable())
subset_intersections <- euler(c("Economic" = 3, "Economic&Physical" = 0, "Economic&Psychological" = 0,
                                 "Economic&Physical&Psychological" = 4, "Physical&Psychological" = 5, "Physical" = 5,
                                 "Psychological" = 4))

pdf(file = "Output/figures4.pdf", width = 12, height = 6)
plot(
  subset_intersections,
  labels = list(labels = TRUE, fontsize = 18),
  quantities = list(fontsize = 0))
dev.off()
print("Figure S4 complete")

# Figure 10 ("Bar plot of qualitative interview themes")
interviews <- read.csv("Input/interview_plot.csv")
interviews <- interviews %>%
  mutate(theme = fct_reorder(theme, Percentage, .desc = FALSE))

figure10 <- ggplot(interviews,
                   aes(x = round(Percentage,digits=2), y = theme)) +
  geom_bar(stat="identity", color="black",fill="#66CC99",width=.8)+
  geom_text(aes(label = paste(round(Percentage,digits=2),"%",sep="")), 
            vjust=1, color="black", size=6,
            position = position_nudge(x=-5.6))+
  theme_bw() +
  labs(y = "", x="Percentage")+
  theme(panel.grid.major.y = element_blank()) +
  theme(axis.text = element_text(size = 18),
        axis.title=element_text(size=17)) +
  scale_x_continuous(expand =  expansion(mult = c(0, .2)))

ggsave("Output/figure10.pdf", width = 12, height = 5, units="in")
print("Figure 10 complete")

#-------------------------------------------------------------------------------
# Producing supplemental qualitative analysis
# Figures S1-S3
# ------------------------------------------------------------------------------

testimonies <- read_csv("Input/testimonies.csv")

tidy_dat <- testimonies %>%
  tidytext::unnest_tokens(word, full)

stop_words <- tidytext::stop_words
spanish_stop_words <- bind_rows(data_frame(word = tm::stopwords("spanish"),
                                           lexicon = "custom"))

tidy<- tidy_dat %>%
  anti_join(spanish_stop_words) %>%
  count(word, sort = T)
pdf('Output/figures1.pdf')
set.seed(5432)
wordcloud(words = tidy$word, freq = tidy$n, min.freq = 1,           
          max.words=200, random.order=FALSE, rot.per=0.35, scale=c(3.5,0.25))

dev.off()
print("Figure S1 complete")

tidy_dat <- testimonies %>%
  tidytext::unnest_tokens(word, translated)
tidy<- tidy_dat %>%
  anti_join(spanish_stop_words) %>%
  anti_join(stop_words) %>%
  count(word, sort = T)
set.seed(5432)

pdf('Output/figures2.pdf')
set.seed(5432)
wordcloud(words = tidy$word, freq = tidy$n, min.freq = 1,           
          max.words=200, random.order=FALSE, rot.per=0.35, scale=c(3.5,0.25))
dev.off()
print("Figure S2 complete")

# STM
tidy <- tidy %>%
  cast_sparse(word, n)
processed <- textProcessor(testimonies$translated, metadata = testimonies)
out <- prepDocuments(processed$documents, processed$vocab, processed$meta)
docs <- out$documents
vocab <- out$vocab
meta <-out$meta

set.seed(5432)
fit <- stm(documents = out$documents, vocab = out$vocab,
           K = 20,
           max.em.its = 75, data = out$meta,
           init.type = "Spectral")

pdf('Output/figures3.pdf')
figures3 <- plot(fit, type = "summary", xlim = c(0, .4))
dev.off()
print("Figure S3 complete")


